<?php

namespace StudyBuddy;

/**
 * Parser of title of one question
 */
class TitleTokenizer extends \StudyBuddy\String\Tokenizer {

    public static function factory(Utf8String $str) {

        return new self($str->toLowerCase()->trim()->valueOf());
    }

    /**
     * Parse title of the question by
     * tokenizing it
     * Overrides parent's parse and users mb_split
     * instead of preg_split to be UTF-8 Safe
     * because title can be a UTF-8 string
     *
     * @return array tokens;
     */
    public function parse() {

        if (empty($this->origString)) {
            d('string was empty, returning empty array');
            return array();
        }

        \mb_regex_encoding('UTF-8');
        $aTokens = \mb_split('([\s,;\"\?]+)', $this->origString);
        $aTokens = \array_unique($aTokens);

        $aStopwords = getStopwords();

        \array_walk($aTokens, function(&$val) use($aStopwords) {
                    $val = \trim($val);
                    $val = ((strlen($val) > 1) && !in_array($val, $aStopwords)) ? $val : false;
                });

        /**
         * Remove empty values
         *
         */
        $aTokens = \array_filter($aTokens);

        /**
         * Call array_values to reindex from 0
         * otherwise if filter removed some
         * elements then Mongo will not
         * treat this as normal array
         */
        return \array_values($aTokens);
    }

}
